#' ---
#' title: "Main Analysis (Exploring Mechanism)"
#' author: "Gento Kato & Fan Lu"
#' date: "June 29, 2023"
#' ---
#' 
#' 
#' # Preparation 
#' 

## Clean Up Space
rm(list=ls())

## Set Working Directory (Automatically) ##
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)); 

require(interflex)
require(estimatr)
require(texreg)
require(ggplot2)
require(ggrepel)
require(haven)
require(sandwich)
require(lmtest)

survey22 <- readRDS("data_survey22_v7.rds")
icpp09 <- readRDS("data_icpp09_v7.rds")

cfmap1 <- list("(Intercept)"="(Intercept)",
               "edu2_parent"="University-educated Parents",
               "cohortCohort II (18+ in 1976-1989)"="Cohort II (18+ in 1975-89)",
               "cohortCohort III (18+ in 1990-99)"="Cohort III (18+ in 1990-99)",
               "cohortCohort IV (18+ in 2000-09)"="Cohort IV (18+ in 2000-09)",
               "cohortCohort V (18+ in 2010-)"="Cohort V (18+ in 2010-)",
               "edu2_parent:cohortCohort II (18+ in 1976-1989)"="University (Parents) * Cohort II",
               "edu2_parent:cohortCohort III (18+ in 1990-99)"="University (Parents) * Cohort III",
               "edu2_parent:cohortCohort IV (18+ in 2000-09)"="University (Parents) * Cohort IV",
               "edu2_parent:cohortCohort V (18+ in 2010-)"="University (Parents) * Cohort V",
               "male"="Gender (Male)", 
               "cohortCohort II (18+ in 1976-1989):male"="Male * Cohort II",
               "cohortCohort III (18+ in 1990-99):male"="Male * Cohort III",
               "cohortCohort IV (18+ in 2000-09):male"="Male * Cohort IV",
               "cohortCohort V (18+ in 2010-):male"="Male * Cohort V",
               "urban_home2"="Hometown in City",
               "cohortCohort II (18+ in 1976-1989):urban_home2"="Hometown (City) * Cohort II",
               "cohortCohort III (18+ in 1990-99):urban_home2"="Hometown (City) * Cohort III",
               "cohortCohort IV (18+ in 2000-09):urban_home2"="Hometown (City) * Cohort IV",
               "cohortCohort V (18+ in 2010-):urban_home2"="Hometown (City) * Cohort V"
)

cfmap <- list("(Intercept)"="(Intercept)",
              "edu2"="University Education",
              "cohortCohort II (18+ in 1976-1989)"="Cohort II (18+ in 1975-89)",
              "cohortCohort III (18+ in 1990-99)"="Cohort III (18+ in 1990-99)",
              "cohortCohort IV (18+ in 2000-09)"="Cohort IV (18+ in 2000-09)",
              "cohortCohort V (18+ in 2010-)"="Cohort V (18+ in 2010-)",
              "edu2:cohortCohort II (18+ in 1976-1989)"="University * Cohort II",
              "edu2:cohortCohort III (18+ in 1990-99)"="University * Cohort III",
              "edu2:cohortCohort IV (18+ in 2000-09)"="University * Cohort IV",
              "edu2:cohortCohort V (18+ in 2010-)"="University * Cohort V",
              "male"="Gender (Male)", 
              "cohortCohort II (18+ in 1976-1989):male"="Male * Cohort II",
              "cohortCohort III (18+ in 1990-99):male"="Male * Cohort III",
              "cohortCohort IV (18+ in 2000-09):male"="Male * Cohort IV",
              "cohortCohort V (18+ in 2010-):male"="Male * Cohort V",
              "incomecatMiddle" = "Income (Middle)",
              "cohortCohort II (18+ in 1976-1989):incomecatMiddle"="Income (Middle) * Cohort II",
              "cohortCohort III (18+ in 1990-99):incomecatMiddle"="Income (Middle) * Cohort III",
              "cohortCohort IV (18+ in 2000-09):incomecatMiddle"="Income (Middle) * Cohort IV",
              "cohortCohort V (18+ in 2010-):incomecatMiddle"="Income (Middle) * Cohort V",
              "incomecatHigh" = "Income (High)",
              "cohortCohort II (18+ in 1976-1989):incomecatHigh"="Income (High) * Cohort II",
              "cohortCohort III (18+ in 1990-99):incomecatHigh"="Income (High) * Cohort III",
              "cohortCohort IV (18+ in 2000-09):incomecatHigh"="Income (High) * Cohort IV",
              "cohortCohort V (18+ in 2010-):incomecatHigh"="Income (High) * Cohort V",
              "incomecatMissing" = "Income (Missing)",
              "cohortCohort II (18+ in 1976-1989):incomecatMissing"="Income (Missing) * Cohort II",
              "cohortCohort III (18+ in 1990-99):incomecatMissing"="Income (Missing) * Cohort III",
              "cohortCohort IV (18+ in 2000-09):incomecatMissing"="Income (Missing) * Cohort IV",
              "cohortCohort V (18+ in 2010-):incomecatMissing"="Income (Missing) * Cohort V",
              "workstatStudent/Housemaker/Part-Time" = 
                "Student/Housemaker/Part-Time",
              "cohortCohort II (18+ in 1976-1989):workstatStudent/Housemaker/Part-Time"=
                "Student/Housemaker/Part-Time * Cohort II",
              "cohortCohort III (18+ in 1990-99):workstatStudent/Housemaker/Part-Time"=
                "Student/Housemaker/Part-Time * Cohort III",
              "cohortCohort IV (18+ in 2000-09):workstatStudent/Housemaker/Part-Time"=
                "Student/Housemaker/Part-Time * Cohort IV",
              "cohortCohort V (18+ in 2010-):workstatStudent/Housemaker/Part-Time"=
                "Student/Housemaker/Part-Time * Cohort V",
              "workstatStudent/Part-Time" = 
                "Student/Part-Time",
              "cohortCohort II (18+ in 1976-1989):workstatStudent/Part-Time"=
                "Student/Part-Time * Cohort II",
              "cohortCohort III (18+ in 1990-99):workstatStudent/Part-Time"=
                "Student/Part-Time * Cohort III",
              "cohortCohort IV (18+ in 2000-09):workstatStudent/Part-Time"=
                "Student/Part-Time * Cohort IV",
              "cohortCohort V (18+ in 2010-):workstatStudent/Part-Time"=
                "Student/Part-Time * Cohort V",
              "workstatSelf-Employed/Full-Time/Managerial" = 
                "Self-Employed/Full-Time",
              "cohortCohort II (18+ in 1976-1989):workstatSelf-Employed/Full-Time/Managerial"=
                "Self-Employed/Full-Time * Cohort II",
              "cohortCohort III (18+ in 1990-99):workstatSelf-Employed/Full-Time/Managerial"=
                "Self-Employed/Full-Time * Cohort III",
              "cohortCohort IV (18+ in 2000-09):workstatSelf-Employed/Full-Time/Managerial"=
                "Self-Employed/Full-Time * Cohort IV",
              "cohortCohort V (18+ in 2010-):workstatSelf-Employed/Full-Time/Managerial"=
                "Self-Employed/Full-Time * Cohort V",
              "married"="Currently Married",
              "cohortCohort II (18+ in 1976-1989):married"="Married * Cohort II",
              "cohortCohort III (18+ in 1990-99):married"="Married * Cohort III",
              "cohortCohort IV (18+ in 2000-09):married"="Married * Cohort IV",
              "cohortCohort V (18+ in 2010-):married"="Married * Cohort V",
              "urban"="Urbanness (Current Residence)",
              "cohortCohort II (18+ in 1976-1989):urban"="Urbanness * Cohort II",
              "cohortCohort III (18+ in 1990-99):urban"="Urbanness * Cohort III",
              "cohortCohort IV (18+ in 2000-09):urban"="Urbanness * Cohort IV",
              "cohortCohort V (18+ in 2010-):urban"="Urbanness * Cohort V")

#'
#' # Analysis 1: Self Selection (Figure 3, Table A7)
#'

#+
## Model of Self-Selection
mS_survey22 <- glm(edu2~
                     edu2_parent*cohort+
                     male*cohort+
                     urban_home2*cohort, 
                   data=survey22, family=binomial("logit"))
out_mS_survey22 <- coeftest(mS_survey22, vcov.=vcovHC(mS_survey22, "HC1"))

screenreg(list(mS_survey22), 
          digits=3, single.row=TRUE,
          override.se = list(out_mS_survey22[,2]),
          override.pvalues = list(out_mS_survey22[,4]),
          custom.model.names = c("University Education"),
          custom.coef.map = cfmap1)
texreg(list(mS_survey22), 
       digits=3, single.row=TRUE,
       override.se = list(out_mS_survey22[,2]),
       override.pvalues = list(out_mS_survey22[,4]),
       custom.model.names = c("University Education"),
       custom.coef.map = cfmap1, 
       custom.note = "%stars. Robust standard errors in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table=FALSE,
       file = "mtab_selection_survey22.tex")

### For plotting:  
mSx1b_survey22 <- interflex(Y = "edu2", D = "edu2_parent", X = "univyr",
                           Z = c("male", "urban_home2"), method="logit",
                           full.moderate = TRUE, 
                           data = survey22, na.rm=TRUE, 
                           estimator = "binning", cutoffs = c(1989,1999,2009),
                           vcov.type = "robust", theme.bw=TRUE)
mSx1b_survey22$figure

mSx2b_survey22 <- interflex(Y = "edu2", D = "male", X = "univyr",
                           Z = c("edu2_parent", "urban_home2"), method="logit",
                           full.moderate = TRUE,
                           data = survey22, na.rm=TRUE, 
                           estimator = "binning", cutoffs = c(1989,1999,2009),
                           vcov.type = "robust", theme.bw=TRUE)
mSx2b_survey22$figure

mSx3b_survey22 <- interflex(Y = "edu2", D = "urban_home2", X = "univyr", 
                           Z = c("edu2_parent","male"), method="logit",
                           full.moderate = TRUE,
                           data = survey22, na.rm=TRUE, 
                           estimator = "binning", cutoffs = c(1989,1999,2009),
                           vcov.type = "robust", theme.bw=TRUE)
mSx3b_survey22$figure

tmp1 <- as.data.frame(mSx1b_survey22$est.bin$`1`)
tmp1$iv <- "University-educated\nParents"
tmp2 <- as.data.frame(mSx2b_survey22$est.bin$`1`)
tmp2$iv <- "Male"
tmp3 <- as.data.frame(mSx3b_survey22$est.bin$`1`)
tmp3$iv <- "Hometown in City"
mS_plot_survey22 <- rbind(tmp1,tmp2,tmp3)
colnames(mS_plot_survey22) <- c("x0","est","sd","lci","uci","iv")
mS_plot_survey22$x <- c(1976,1994.5,2004.5,2016)
mS_plot_survey22$lb <- factor(c("I & II","III","IV","V"), 
                              levels=c("I & II","III","IV","V"))
mS_plot_survey22$iv <- 
  factor(mS_plot_survey22$iv,levels=unique(mS_plot_survey22$iv))

set.seed(12345)
mSx1_survey22 <- interflex(Y = "edu2", D = "edu2_parent", X = "univyr",
                           Z = c("male", "urban_home2"), method="logit",
                           full.moderate = TRUE,
                           data = survey22, na.rm=TRUE, estimator = "kernel",
                           bw=5, #CI=FALSE,
                           nboots = 1000, parallel = TRUE, cores = 3, theme.bw=TRUE)
# mSx1_survey22$figure

set.seed(12345)
mSx2_survey22 <- interflex(Y = "edu2", D = "male", X = "univyr",
                           Z = c("edu2_parent", "urban_home2"), method="logit",
                           full.moderate = TRUE,
                           data = survey22, na.rm=TRUE, estimator = "kernel",
                           bw=5, #CI=FALSE,
                           nboots = 1000, parallel = TRUE, cores = 3, theme.bw=TRUE)
# mSx2_survey22$figure

set.seed(12345)
mSx3_survey22 <- interflex(Y = "edu2", D = "urban_home2", X = "univyr",
                           Z = c("edu2_parent","male"), method="logit",
                           full.moderate = TRUE,
                           data = survey22, na.rm=TRUE, estimator = "kernel",
                           bw=5, #CI=FALSE,
                           nboots = 1000, parallel = TRUE, cores = 3, theme.bw=TRUE)
# mSx3_survey22$figure

# save(mSx1_survey22, mSx2_survey22, mSx3_survey22, file="mx_selection_survey22.RData")
# load("mx_selection_survey22.RData")

tmp1 <- as.data.frame(mSx1_survey22$est.kernel$`1`)
tmp1$iv <- "University-educated\nParents"
tmp2 <- as.data.frame(mSx2_survey22$est.kernel$`1`)
tmp2$iv <- "Male"
tmp3 <- as.data.frame(mSx3_survey22$est.kernel$`1`)
tmp3$iv <- "Hometown in City"
out <- rbind(tmp1,tmp2,tmp3)
out$iv <- factor(out$iv,levels=unique(out$iv))
out
tmp1 <- data.frame(x=mSx1_survey22$hist.out$mids,
                   y=c(mSx1_survey22$count.tr$`0`,mSx1_survey22$count.tr$`1`),
                   tr=rep(c("0","1"), each=length(mSx1_survey22$hist.out$mids)))
tmp1$iv <- "University-educated\nParents"
tmp2 <- data.frame(x=mSx2_survey22$hist.out$mids,
                   y=c(mSx2_survey22$count.tr$`0`,mSx2_survey22$count.tr$`1`),
                   tr=rep(c("0","1"), each=length(mSx2_survey22$hist.out$mids)))
tmp2$iv <- "Male"
tmp3 <- data.frame(x=mSx3_survey22$hist.out$mids,
                   y=c(mSx3_survey22$count.tr$`0`,mSx3_survey22$count.tr$`1`),
                   tr=rep(c("0","1"), each=length(mSx3_survey22$hist.out$mids)))
tmp3$iv <- "Hometown in City"
outh <- rbind(tmp1,tmp2,tmp3)
outh$iv <- factor(outh$iv,levels=unique(outh$iv))
outh$tr <- factor(outh$tr, levels=c("1","0"))

#+ fig.width=7, fig.height=4
## Figure
movementS <- 0.15
ggplot(out) + 
  geom_vline(aes(xintercept=1975), linetype=2, color="gray80") + 
  geom_vline(aes(xintercept=1990), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2000), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2010), linetype=1, color="gray80") + 
  geom_col(data=outh, aes(x=x,y=y*0.0008,fill=tr), width=0.8) + 
  geom_hline(aes(yintercept=0+movementS), linetype=2, color="gray50") + 
  geom_ribbon(aes(x=X,ymin=`lower CI(95%)`+movementS,
                  ymax=`upper CI(95%)`+movementS),alpha=0.3) +
  geom_line(aes(x=X,y=TE+movementS)) +
  # geom_line(aes(x=X, y=`lower CI(95%)`+movementS),alpha=0.7, linetype=2) +
  # geom_line(aes(x=X, y=`upper CI(95%)`+movementS),alpha=0.7, linetype=2) +
  geom_errorbar(data=mS_plot_survey22, aes(x=x, ymin=lci+movementS,ymax=uci+movementS), alpha=0.8, width=2) +
  geom_point(data=mS_plot_survey22, aes(x=x, y=est+movementS), size=1.5, color="gray10", alpha=0.8) +
  geom_text(data=mS_plot_survey22, aes(x=x, y=-0.17+movementS, label=lb), family="serif") + 
  facet_grid(.~iv) + 
  scale_fill_manual(values=c("black","gray70")) + 
  scale_x_continuous(breaks=c(1975,1990,2000,2010)) +
  scale_y_continuous(labels = function(y) round(y - 0.1,2)) +
  coord_cartesian(ylim=c(-0.02,0.7)) + 
  labs(x = "Year of Turning 19 (Typical Year of Entering University)",
       y = "Conditional Marginal Effect (Logit)\nwith 95% Confidence Interval",
       caption = paste0("Histogram indicates the frequency of cases (black = treated, gray = not).")) +
  theme_classic(base_family="serif") + 
  theme(plot.title = element_text(hjust=0.5),
        panel.background = element_rect(color="black", size=1),
        strip.text = element_text(size=11, face="bold"),
        legend.position="none")

ggsave("mx_selection_cplot_survey22.pdf", width=7, height=4)
ggsave("mx_selection_cplot_survey22.png", width=7, height=4)

#'
#' # Analysis 2: Socialization (Figure 4, Table A8)
#'

#+
## Models of Socialization

## Models with Categorical Interaction
mA_survey22 <- lm_robust(foreigncontact~edu2*cohort+
                           male*cohort+
                           incomecat*cohort+
                           workstat*cohort+
                           married*cohort+
                           urban*cohort, 
                         data=survey22, se_type="stata")
mB_survey22 <- lm_robust(famrights~edu2*cohort+
                           male*cohort+
                           incomecat*cohort+
                           workstat*cohort+
                           married*cohort+
                           urban*cohort, 
                         data=survey22, se_type="stata")
screenreg(list(mA_survey22,mB_survey22), 
          digits=3, include.ci=FALSE, single.row=TRUE,
          custom.model.names = c("Contact with \nForeigners",
                                 "Support Separate Surnames/\nSame-sex Marriage"),
          custom.coef.map = cfmap)

mA_edu2_survey22 <- 
  as.data.frame(  rbind(
    summary(lm_robust(foreigncontact~edu2*relevel(cohort,levels(cohort)[1])+
                        male*relevel(cohort,levels(cohort)[1])+
                        incomecat*relevel(cohort,levels(cohort)[1])+
                        workstat*relevel(cohort,levels(cohort)[1])+
                        married*relevel(cohort,levels(cohort)[1])+
                        urban*relevel(cohort,levels(cohort)[1]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(foreigncontact~edu2*relevel(cohort,levels(cohort)[2])+
                        male*relevel(cohort,levels(cohort)[2])+
                        incomecat*relevel(cohort,levels(cohort)[2])+
                        workstat*relevel(cohort,levels(cohort)[2])+
                        married*relevel(cohort,levels(cohort)[2])+
                        urban*relevel(cohort,levels(cohort)[2]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(foreigncontact~edu2*relevel(cohort,levels(cohort)[3])+
                        male*relevel(cohort,levels(cohort)[3])+
                        incomecat*relevel(cohort,levels(cohort)[3])+
                        workstat*relevel(cohort,levels(cohort)[3])+
                        married*relevel(cohort,levels(cohort)[3])+
                        urban*relevel(cohort,levels(cohort)[3]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(foreigncontact~edu2*relevel(cohort,levels(cohort)[4])+
                        male*relevel(cohort,levels(cohort)[4])+
                        incomecat*relevel(cohort,levels(cohort)[4])+
                        workstat*relevel(cohort,levels(cohort)[4])+
                        married*relevel(cohort,levels(cohort)[4])+
                        urban*relevel(cohort,levels(cohort)[4]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)]
  )  )
colnames(mA_edu2_survey22) <- c("est","p","lci","uci")
mA_edu2_survey22$dv <- "Contact with \nForeigners"
# unique(sort(survey22$univyr))
# median(1963:1989); median(1990:1999); median(2000:2009); median(2010:2022) 
mA_edu2_survey22$x <- c(1976,1994.5,2004.5,2016)
mA_edu2_survey22$lb <- factor(c("I & II","III","IV","V"), levels=c("I & II","III","IV","V"))

mB_edu2_survey22 <- 
  as.data.frame(  rbind(
    summary(lm_robust(famrights~edu2*relevel(cohort,levels(cohort)[1])+
                        male*relevel(cohort,levels(cohort)[1])+
                        incomecat*relevel(cohort,levels(cohort)[1])+
                        workstat*relevel(cohort,levels(cohort)[1])+
                        married*relevel(cohort,levels(cohort)[1])+
                        urban*relevel(cohort,levels(cohort)[1]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(famrights~edu2*relevel(cohort,levels(cohort)[2])+
                        male*relevel(cohort,levels(cohort)[2])+
                        incomecat*relevel(cohort,levels(cohort)[2])+
                        workstat*relevel(cohort,levels(cohort)[2])+
                        married*relevel(cohort,levels(cohort)[2])+
                        urban*relevel(cohort,levels(cohort)[2]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(famrights~edu2*relevel(cohort,levels(cohort)[3])+
                        male*relevel(cohort,levels(cohort)[3])+
                        incomecat*relevel(cohort,levels(cohort)[3])+
                        workstat*relevel(cohort,levels(cohort)[3])+
                        married*relevel(cohort,levels(cohort)[3])+
                        urban*relevel(cohort,levels(cohort)[3]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(famrights~edu2*relevel(cohort,levels(cohort)[4])+
                        male*relevel(cohort,levels(cohort)[4])+
                        incomecat*relevel(cohort,levels(cohort)[4])+
                        workstat*relevel(cohort,levels(cohort)[4])+
                        married*relevel(cohort,levels(cohort)[4])+
                        urban*relevel(cohort,levels(cohort)[4]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)]
  )  )
colnames(mB_edu2_survey22) <- c("est","p","lci","uci")
mB_edu2_survey22$dv <- "Support Separate Surnames/\nSame-sex Marriage"
mB_edu2_survey22$x <- c(1976,1994.5,2004.5,2016)
mB_edu2_survey22$lb <- factor(c("I & II","III","IV","V"), levels=c("I & II","III","IV","V"))

m_edu2_survey22 <- rbind(mA_edu2_survey22,mB_edu2_survey22)
m_edu2_survey22$dv <- factor(m_edu2_survey22$dv,levels=unique(m_edu2_survey22$dv))
m_edu2_survey22$lb <- factor(c("I & II","III","IV","V"), levels=c("I & II","III","IV","V"))
m_edu2_survey22

## Interflex (without background variables)
set.seed(12345)
mxA_survey22 <- interflex(Y = "foreigncontact", D = "edu2", X = "univyr",
                          Z = c("male", "incomecat", "workstat", "married", "urban"),
                          data = survey22, na.rm=TRUE, estimator = "kernel",
                          full.moderate = TRUE, bw=5, #CI=FALSE,
                          nboots = 1000, parallel = TRUE, cores = 3, theme.bw=TRUE)
# mxA_survey22$figure
set.seed(12345)
mxB_survey22 <- interflex(Y = "famrights", D = "edu2", X = "univyr",
                          Z = c("male", "incomecat", "workstat", "married", "urban"),
                          data = survey22, na.rm=TRUE, estimator = "kernel",
                          full.moderate = TRUE, bw=5, #CI=FALSE,
                          nboots = 1000, parallel = TRUE, cores = 3, theme.bw=TRUE)
# mxB_survey22$figure

# save(mxA_survey22, mxB_survey22, file="mx_socialization_survey22.RData")
# load("mx_socialization_survey22.RData")

tmpA <- as.data.frame(mxA_survey22$est.kernel$`1`)
tmpA$dv <- "Contact with \nForeigners"
tmpB <- as.data.frame(mxB_survey22$est.kernel$`1`)
tmpB$dv <- "Support Separate Surnames/\nSame-sex Marriage"
outA <- tmpA; outB <- tmpB
out <- rbind(tmpA,tmpB)
out$dv <- factor(out$dv,levels=unique(out$dv))

tmpA <- data.frame(x=mxA_survey22$hist.out$mids,
                   y=c(mxA_survey22$count.tr$`0`,mxA_survey22$count.tr$`1`),
                   tr=rep(c("0","1"), each=length(mxA_survey22$hist.out$mids)))
tmpA$dv <- "Contact with \nForeigners"
tmpB <- data.frame(x=mxB_survey22$hist.out$mids,
                   y=c(mxB_survey22$count.tr$`0`,mxB_survey22$count.tr$`1`),
                   tr=rep(c("0","1"), each=length(mxB_survey22$hist.out$mids)))
tmpB$dv <- "Support Separate Surnames/\nSame-sex Marriage"
outhA <- tmpA; outhB <- tmpB
outh <- rbind(tmpA,tmpB)
outh$dv <- factor(outh$dv,levels=unique(outh$dv))
outh$tr <- factor(outh$tr, levels=c("1","0"))

#+ fig.width=7, fig.height=4
## Figure

movementA <- 1
pA <- ggplot(outA) + 
  geom_vline(aes(xintercept=1975), linetype=2, color="gray80") + 
  geom_vline(aes(xintercept=1990), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2000), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2010), linetype=1, color="gray80") + 
  geom_col(data=outhA, aes(x=x,y=y*0.004,fill=tr), width=0.8) +
  geom_hline(aes(yintercept=0+movementA), linetype=2, color="gray50") + 
  geom_ribbon(aes(x=X,ymin=`lower CI(95%)`+movementA,
                  ymax=`upper CI(95%)`+movementA),alpha=0.3) +
  geom_line(aes(x=X,y=TE+movementA)) +
  # geom_line(aes(x=X, y=`lower CI(95%)`+movementA),alpha=0.7, linetype=2) +
  # geom_line(aes(x=X, y=`upper CI(95%)`+movementA),alpha=0.7, linetype=2) +
  geom_errorbar(data=mA_edu2_survey22, 
                aes(x=x, ymin=lci+movementA,ymax=uci+movementA), alpha=0.8, width=2) + 
  geom_point(data=mA_edu2_survey22, aes(x=x, y=est+movementA), size=1.5, color="gray10", alpha=0.8) + 
  geom_text(data=mA_edu2_survey22, aes(x=x, y=-1.1+movementA, label=lb), family="serif") + 
  facet_wrap(vars(dv), scales="free_y") + 
  scale_fill_manual(values=c("black","gray70")) + 
  scale_x_continuous(breaks=c(1975,1990,2000,2010)) +
  scale_y_continuous(labels = function(y) round(y - movementA,2)) +
  # coord_cartesian(ylim=c(-0.02,0.3)) + 
  labs(x = "Year of Turning 19 (Typical Year of Entering University)",
       y = "Conditional Coefficient with 95% Confidence Interval",
       caption = paste0("Histogram indicates the frequency of cases (black = university educated, gray = not).")) +
  theme_classic(base_family="serif") + 
  theme(plot.title = element_text(hjust=0.5),
        panel.background = element_rect(color="black", size=1),
        strip.text = element_text(size=11, face="bold"),
        legend.position="none")
pA

movementB <- 0.3
pB <- ggplot(outB) + 
  geom_vline(aes(xintercept=1975), linetype=2, color="gray80") + 
  geom_vline(aes(xintercept=1990), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2000), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2010), linetype=1, color="gray80") + 
  geom_col(data=outhB, aes(x=x,y=y*0.0015,fill=tr), width=0.8) +
  geom_hline(aes(yintercept=0+movementB), linetype=2, color="gray50") + 
  geom_ribbon(aes(x=X,ymin=`lower CI(95%)`+movementB,
                  ymax=`upper CI(95%)`+movementB),alpha=0.3) +
  geom_line(aes(x=X,y=TE+movementB)) +
  # geom_line(aes(x=X, y=`lower CI(95%)`+movementB),alpha=0.7, linetype=2) +
  # geom_line(aes(x=X, y=`upper CI(95%)`+movementB),alpha=0.7, linetype=2) +
  geom_errorbar(data=mB_edu2_survey22, 
                aes(x=x, ymin=lci+movementB,ymax=uci+movementB), alpha=0.8, width=2) + 
  geom_point(data=mB_edu2_survey22, aes(x=x, y=est+movementB), size=1.5, color="gray10", alpha=0.8) + 
  geom_text(data=mB_edu2_survey22, aes(x=x, y=-0.34+movementB, label=lb), family="serif") + 
  facet_wrap(vars(dv), scales="free_y") + 
  scale_fill_manual(values=c("black","gray70")) + 
  scale_x_continuous(breaks=c(1975,1990,2000,2010)) +
  scale_y_continuous(labels = function(y) round(y - movementB,2)) +
  # coord_cartesian(ylim=c(-0.02,0.3)) + 
  labs(x = "Year of Turning 19 (Typical Year of Entering University)",
       y = "Conditional Coefficient with 95% Confidence Interval",
       caption = paste0("Histogram indicates the frequency of cases (black = university educated, gray = not).")) +
  theme_classic(base_family="serif") + 
  theme(plot.title = element_text(hjust=0.5),
        panel.background = element_rect(color="black", size=1),
        strip.text = element_text(size=11, face="bold"),
        legend.position="none")
pB

library(cowplot)
library(grid)
library(gridExtra)
p <- plot_grid(pA + labs(x=NULL,y=NULL,caption=NULL),
               pB + labs(x=NULL,y=NULL,caption=NULL), nrow=1)
#combine using cowplot
y.grob <- textGrob("Conditional Coefficient with 95% Confidence Interval", 
                   gp=gpar(col="black", fontsize=12, fontfamily="serif"), rot=90)
x.grob <- textGrob("Year of Turning 19 (Typical Year of Entering University)", 
                   gp=gpar(col="black", fontsize=12, fontfamily="serif"))
#add to plot
g <- arrangeGrob(p, left = y.grob, bottom = x.grob)

#+ fig.width=7, fig.height=4
grid.arrange(g)

ggsave("mx_socialization_cplot_survey22.pdf", g, width=7, height=4)
ggsave("mx_socialization_cplot_survey22.png", g, width=7, height=4)

#+
## Result Tables ##
screenreg(list(mA_survey22,mB_survey22), 
          digits=3, include.ci=FALSE, single.row=TRUE,
          custom.model.names = c("Contact with Foreigners","Support Equality"),
          custom.coef.map = cfmap, 
          custom.note = "%stars. Robust standard errors in parentheses.")

texreg(list(mA_survey22,mB_survey22), 
       digits=3, include.ci=FALSE, single.row=TRUE,
       custom.model.names = c("Contact with Foreigners","Support Equality"),
       custom.coef.map = cfmap, 
       custom.note = "%stars. Robust standard errors in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table=FALSE,
       file = "mtab_socialization_survey22.tex")

#+
###############################################################
## Model with foreigners as co-student (Figure A7, Table A9) ##
###############################################################

## Models with Categorical Interaction
mcs_survey22 <- lm_robust(foreigncontact_costudent~edu2*cohort+
                           male*cohort+
                           incomecat*cohort+
                           workstat*cohort+
                           married*cohort+
                           urban*cohort, 
                         data=survey22, se_type="stata")
screenreg(list(mcs_survey22), 
          digits=3, include.ci=FALSE, single.row=TRUE,
          custom.model.names = c("Studied Together with Foreigners"),
          custom.coef.map = cfmap)

mcs_edu2_survey22 <- 
  as.data.frame(  rbind(
    summary(lm_robust(foreigncontact_costudent~edu2*relevel(cohort,levels(cohort)[1])+
                        male*relevel(cohort,levels(cohort)[1])+
                        incomecat*relevel(cohort,levels(cohort)[1])+
                        workstat*relevel(cohort,levels(cohort)[1])+
                        married*relevel(cohort,levels(cohort)[1])+
                        urban*relevel(cohort,levels(cohort)[1]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(foreigncontact_costudent~edu2*relevel(cohort,levels(cohort)[2])+
                        male*relevel(cohort,levels(cohort)[2])+
                        incomecat*relevel(cohort,levels(cohort)[2])+
                        workstat*relevel(cohort,levels(cohort)[2])+
                        married*relevel(cohort,levels(cohort)[2])+
                        urban*relevel(cohort,levels(cohort)[2]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(foreigncontact_costudent~edu2*relevel(cohort,levels(cohort)[3])+
                        male*relevel(cohort,levels(cohort)[3])+
                        incomecat*relevel(cohort,levels(cohort)[3])+
                        workstat*relevel(cohort,levels(cohort)[3])+
                        married*relevel(cohort,levels(cohort)[3])+
                        urban*relevel(cohort,levels(cohort)[3]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)],
    summary(lm_robust(foreigncontact_costudent~edu2*relevel(cohort,levels(cohort)[4])+
                        male*relevel(cohort,levels(cohort)[4])+
                        incomecat*relevel(cohort,levels(cohort)[4])+
                        workstat*relevel(cohort,levels(cohort)[4])+
                        married*relevel(cohort,levels(cohort)[4])+
                        urban*relevel(cohort,levels(cohort)[4]), 
                      data=survey22, se_type="stata"))$coefficients[2,c(1,4,5,6)]
  )  )
colnames(mcs_edu2_survey22) <- c("est","p","lci","uci")
mcs_edu2_survey22$dv <- "Studied Together with Foreigners"
# unique(sort(survey22$univyr))
# median(1963:1989); median(1990:1999); median(2000:2009); median(2010:2022) 
mcs_edu2_survey22$x <- c(1976,1994.5,2004.5,2016)
mcs_edu2_survey22$lb <- factor(c("I & II","III","IV","V"), levels=c("I & II","III","IV","V"))
mcs_edu2_survey22$type <- "Marginal Effect of \nUniversity Education"

## Interflex (without background variables)
set.seed(12345)
mcsx1_survey22 <- interflex(Y = "foreigncontact_costudent", D = "edu2", X = "univyr",
                          Z = c("male", "incomecat", "workstat", "married", "urban"),
                          data = survey22, na.rm=TRUE, estimator = "kernel",
                          full.moderate = TRUE, bw=5, #CI=FALSE,
                          nboots = 1000, parallel = TRUE, cores = 3, theme.bw=TRUE)
# mcsx1_survey22$figure

# save(mcsx1_survey22, file="mx_socialization_survey22_costudent.RData")
# load("mx_socialization_survey22_costudent.RData")

tmp1 <- as.data.frame(mcsx1_survey22$est.kernel$`1`)
tmp1$dv <- "Studied Together with Foreigners"
tmp1$type <- "Marginal Effect of \nUniversity Education"
out <- tmp1
out$dv <- factor(out$dv,levels=unique(out$dv))
out

tmp1 <- data.frame(x=mcsx1_survey22$hist.out$mids,
                   y=c(mcsx1_survey22$count.tr$`0`,mcsx1_survey22$count.tr$`1`),
                   tr=rep(c("0","1"), each=length(mcsx1_survey22$hist.out$mids)))
tmp1$dv <- "Studied Together with Foreigners"
tmp1$type <- "Marginal Effect of \nUniversity Education"
outh <- tmp1
outh$dv <- factor(outh$dv,levels=unique(outh$dv))
outh$tr <- factor(outh$tr, levels=c("1","0"))

#+ fig.width=7, fig.height=4
## Figure
movementS <- 0.1
pme <- ggplot(out) + 
  geom_vline(aes(xintercept=1975), linetype=2, color="gray80") + 
  geom_vline(aes(xintercept=1990), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2000), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2010), linetype=1, color="gray80") + 
  geom_col(data=outh, aes(x=x,y=y*0.0008,fill=tr), width=0.8) + 
  geom_hline(aes(yintercept=0+movementS), linetype=2, color="gray50") + 
  geom_ribbon(aes(x=X,ymin=`lower CI(95%)`+movementS,
                  ymax=`upper CI(95%)`+movementS),alpha=0.3) +
  geom_line(aes(x=X,y=TE+movementS)) +
  # geom_line(aes(x=X, y=`lower CI(95%)`+movementS),alpha=0.7, linetype=2) +
  # geom_line(aes(x=X, y=`upper CI(95%)`+movementS),alpha=0.7, linetype=2) +
  geom_errorbar(data=mcs_edu2_survey22, aes(x=x, ymin=lci+movementS,ymax=uci+movementS), alpha=0.8, width=2) +
  geom_point(data=mcs_edu2_survey22, aes(x=x, y=est+movementS), size=1.5, color="gray10", alpha=0.8) +
  geom_text(data=mcs_edu2_survey22, aes(x=x, y=-0.115+movementS, label=lb), family="serif") + 
  facet_grid(.~type) + 
  scale_fill_manual(values=c("black","gray70")) + 
  scale_x_continuous(breaks=c(1975,1990,2000,2010)) +
  scale_y_continuous(labels = function(y) round(y - 0.1,2)) +
  coord_cartesian(ylim=c(-0.02,0.35)) + 
  labs(x = "Year of Turning 19 (Typical Year of Entering University)",
       y = "Conditional Coefficient with 95% Confidence Interval",
       caption = paste0("Histogram indicates the frequency of cases\n(black = treated, gray = not).")) +
  theme_classic(base_family="serif") + 
  theme(plot.title = element_text(hjust=0.5),
        panel.background = element_rect(color="black", size=1),
        strip.text = element_text(size=11, face="bold"),
        legend.position="none")
pme

# ggsave("mx_socialization_costudent_cplot_survey22.pdf", width=7, height=4)

###########################################################
## Simulated Values of Learning Together with Foreigners ##
###########################################################

## Simulation Data
simdt <- na.omit(survey22[,all.vars(mcs_survey22$terms)])
table(simdt$cohort, simdt$edu2)

## Simulation Data
simvals <- data.frame(edu2 = rep(c(0,1), each=4),
                      cohort = levels(simdt$cohort))

## Simulation Result
simres <- as.data.frame(t(sapply(1:nrow(simvals), function(i) {
  tmpdt <- simdt
  tmpdt$edu2 <- simvals$edu2[i]
  tmpdt$cohort <- simvals$cohort[i]
  colMeans(as.data.frame(predict(mcs_survey22, newdata = tmpdt, se.fit = TRUE)))
  })))
simres <- cbind(simvals, simres)
simres$cohort <- factor(simres$cohort, levels(simdt$cohort))
simres$x <- c(1976,1994.5,2004.5,2016)
simres$lb <- factor(c("I & II","III","IV","V"), levels=c("I & II","III","IV","V"))
simres$lci <- simres$fit - qnorm(0.975)*simres$se.fit
simres$uci <- simres$fit + qnorm(0.975)*simres$se.fit
simres$mod <- as.factor(ifelse(simres$edu2==0, 
                               "Non-University Graduates", 
                               "University Graduates"))
simres$type <- "Predicted Probability of \nHaving Studied Together with Foreigners"

outhx <- outh
outhx$type <- "Predicted Probability of \nHaving Studied Together with Foreigners"

#+ fig.width=7, fig.height=4
## Figure
movementSx <- 0.1
ppp <- ggplot(simres) + 
  geom_vline(aes(xintercept=1975), linetype=2, color="gray80") + 
  geom_vline(aes(xintercept=1990), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2000), linetype=1, color="gray80") + 
  geom_vline(aes(xintercept=2010), linetype=1, color="gray80") + 
  geom_col(data=outhx, aes(x=x,y=y*0.002,fill=tr), width=0.8, show.legend = FALSE) + 
  geom_errorbar(aes(x=x, ymin=lci+movementSx,ymax=uci+movementSx, linetype=mod), 
                alpha=0.8, width=2, position = position_dodge(width=2)) +
  geom_point(aes(x=x, y=fit+movementSx, shape=mod), size=1.5, 
             color="gray10", alpha=0.8, position = position_dodge(width=2)) +
  geom_text(aes(x=x, y=-0.15+movementSx, label=lb), family="serif") + 
  facet_grid(.~type) + 
  scale_shape_discrete(name="") + scale_linetype_discrete(name="") + 
  scale_fill_manual(values=c("black","gray70")) + 
  scale_x_continuous(breaks=c(1975,1990,2000,2010)) +
  scale_y_continuous(breaks = c(0, 0.25,0.5,0.75) + movementSx, 
                     labels = function(y) round(y - movementSx,2)) +
  # coord_cartesian(ylim=c(-0.02,0.35)) + 
  labs(x = "Year of Turning 19 (Typical Year of Entering University)",
       y = "Predicted Probability with 95% Confidence Interval",
       caption = paste0("Predicted probabilities are calculated fixing\nall covariates at observed values and then taking average.")) +
  theme_classic(base_family="serif") + 
  theme(plot.title = element_text(hjust=0.5),
        panel.background = element_rect(color="black", size=1),
        strip.text = element_text(size=11, face="bold"),
        legend.background = element_blank(),
        legend.position=c(0.32,0.9)) 
ppp

# ggsave("mx_socialization_costudent_cplot_survey22.pdf", width=7, height=4)

library(cowplot)
library(grid)
library(gridExtra)
p <- plot_grid(ppp + labs(x=NULL),
               pme + labs(x=NULL), nrow=1)
#combine using cowplot
x.grob <- textGrob("Year of Turning 19 (Typical Year of Entering University)", 
                   gp=gpar(col="black", fontsize=12, fontfamily="serif"))
#add to plot
g <- arrangeGrob(p, bottom = x.grob)

#+ fig.width=7, fig.height=4
grid.arrange(g)

ggsave("mx_socialization_costudent_cplot_survey22.pdf", g, width=7, height=4)

